import pandas as pd

# ========== 配置 ==========
input_path = r"C:\Users\Lenovo\Desktop\000\方案\无结构验证\02\融合dpo模型生成评估数据的方案.xlsx"   # 输入文件（prompt + 方案1）
output_path = r"C:\Users\Lenovo\Desktop\000\价值维度匹配分析\模型生成方案维度占比\无结构的02\公众维度_dpo融合.xlsx"  # 输出文件
# ==========================

# ====== 定义维度关键词字典（示例：公众版，你可以换专家版） ======
dim_dict = {
    "文化传承与历史保护": ["文化", "历史"],
    "配套设施改善": ["提升", "改造", "配套", "融合", "升级", "改善", "更新", "保护", "整合"],
    "整体发展与宜居生活": ["发展", "规划", "品质", "生活", "居住", "宜居", "活力", "绿化", "生活品质"],
    "空间利用与生态环境": ["用地", "空间", "生态", "园区", "土地", "片区", "空间布局", "环境", "景观"],
    "公共服务与社区生活": ["城市", "社区", "居民", "公共服务", "交通", "街区"],
    "土地与资源效率": ["优化", "利用效率", "效率"],
    "产业经济与创新发展": ["产业", "商业", "创新", "科技", "产业园", "企业", "产业结构", "工业"]
}

# 1. 读取数据
df = pd.read_excel(input_path)

# 2. 给方案打标签
def match_dimensions(text, dim_dict):
    matched_dims = []
    for dim, keywords in dim_dict.items():
        for kw in keywords:
            if kw in str(text):
                matched_dims.append(dim)
                break  # 避免同一维度重复计数
    return matched_dims

df["匹配维度"] = df["方案1"].apply(lambda x: match_dimensions(x, dim_dict))

# 3. 统计维度分布
all_dims = []
for dims in df["匹配维度"]:
    all_dims.extend(dims)

dim_count = pd.Series(all_dims).value_counts().reset_index()
dim_count.columns = ["维度", "出现次数"]

# 计算占比
dim_count["占比(%)"] = dim_count["出现次数"] / dim_count["出现次数"].sum() * 100

# 4. 保存结果
with pd.ExcelWriter(output_path) as writer:
    df.to_excel(writer, sheet_name="逐条结果", index=False)
    dim_count.to_excel(writer, sheet_name="维度统计", index=False)

print("完成 ✅ 结果已保存到：", output_path)
